Overview of outcome of variables

Outcome by county

point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
    
demo_plot = demo %>%
    filter(!county_name %in% "Out of state") %>%
    group_by(county_name) %>% 
    summarize(total_cases = sum(reported_cases),
              total_deaths = sum(reported_deaths)) %>% 
    mutate(county_name = fct_reorder(county_name, total_cases)) %>% 
    ggplot(aes(x = county_name, y = total_cases, color = county_name)) +
    geom_point(alpha = 1) +
    theme(
      legend.position = "none"
    ) +
    xlab("County") +
    ylab("Total cases") +
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
    scale_y_continuous(labels = point)
    # theme(
    #       axis.title.x = element_text(size = 6),
    #       axis.text.x = element_text(size = 5),
    #       axis.title.y = element_text(size = 6),
    #       axis.text.y = element_text(size = 5)) +
    #       scale_y_continuous(labels = point)
ggplotly(demo_plot, width = 800, height = 500)
point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
    
    demo %>% 
    filter(!county_name %in% "Out of state") %>%
    group_by(county_name) %>% 
    summarize(total_cases = sum(reported_cases),
              total_deaths = sum(reported_deaths)) %>% 
    mutate(county_name = fct_reorder(county_name, total_cases)) %>%
    ggplot(aes(x = total_cases, fill = county_name)) +
    geom_histogram(aes(y = ..density..), alpha = 0.5, bins = 50) +
    geom_density(alpha = 0.5, aes(color = county_name)) +
    labs(x = "Total cases",
         y = "Density") +
    theme(
      legend.position = "bottom"
    ) + 
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
   theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point) +
          scale_x_continuous(labels = point)

Worst counties for each COVID outcome

demo_worst =
    demo %>% 
    group_by(county_name, ) %>% 
    summarize(total_cases = sum(reported_cases),
              total_deaths = sum(reported_deaths)) %>%
    select(county_name, total_cases) %>% 
    arrange(desc(total_cases)) 

    head(demo_worst, 10) %>% 
    knitr::kable(
    caption = "Worst countries for each COVID outcome",
    col.names = c("County", "Total cases"),
    digits = 2
  )
Worst countries for each COVID outcome
County Total cases
Los Angeles 3332327
San Diego 938583
Riverside 704180
San Bernardino 679620
Orange 676959
Santa Clara 442182
Alameda 356253
Sacramento 353538
Fresno 278793
Contra Costa 256191

Best counties for each COVID outcome

demo_best =
    demo %>% 
    group_by(county_name, ) %>% 
    summarize(total_cases = sum(reported_cases),
              total_deaths = sum(reported_deaths)) %>%
    select(county_name, total_cases) %>% 
    arrange(total_cases)

    head(demo_best, 10) %>% 
    knitr::kable(
    caption = "Best countries for each COVID outcome",
    col.names = c("County", "Total cases"),
    digits = 2
  )
Best countries for each COVID outcome
County Total cases
Alpine 138
Sierra 308
Modoc 895
Trinity 1425
Mariposa 3156
Mono 3233
Plumas 3737
Colusa 3840
Inyo 4557
Siskiyou 5385
point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)

demo_worst_p =
    demo_worst %>% 
    filter(total_cases >= 256191) %>% 
    ggplot(aes(x = county_name, y = total_cases, fill = county_name)) +
    geom_bar(stat = "identity", width = 0.5) +
    theme(
      legend.position = "none"
    ) +
    xlab("") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1))+
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point) 

demo_best_p =
    demo_best %>% 
    filter(total_cases <= 5385) %>% 
    ggplot(aes(x = county_name, y = total_cases, fill = county_name)) +
    geom_bar(stat = "identity", width = 0.5) +
    theme(
      legend.position = "none"
    ) +
    xlab("") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point)

demo_worst_p + demo_best_p

Age vs Total Cases

point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
grid.arrange(
 
    age %>% 
    ggplot(aes(x = age_group, y = total_cases, fill = age_group)) +
    geom_bar(stat = "identity", width = 0.5) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Age") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point),
    

    age %>% 
    ggplot(aes(x = total_cases, fill = age_group)) +
    geom_histogram(aes(y = ..density..), alpha = 0.5, bins = 50) +
    geom_density(alpha = 0.3, aes(color = age_group)) +
    scale_color_viridis(discrete = TRUE) +
    labs(x = "Total cases",
         y = "Density") +
    theme(legend.position = "right") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_x_continuous(labels = point) +
    scale_y_continuous(labels = point),
    
    
    age %>%
    ggplot(aes(x = age_group, y = total_cases, fill = age_group)) +
    geom_boxplot(alpha = 0.5) +
    geom_hline(yintercept = median(age$total_cases, na.rm = T), color = "red", size = 0.4, lty = "dashed") +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Age") +
    ylab("Total cases") +
        theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point),


layout_matrix = rbind(c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2)
))

# Highest Total cases
age_17 = 
  age %>% 
  filter(age_group == "0-17") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)


age_49 =
  age %>% 
  filter(age_group == "18-49") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100) 

age_64 = 
  age %>% 
  filter(age_group == "50-64") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)

age_65 =
  age %>% 
  filter(age_group == "65+") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)

age_all = 
    rbind(age_17, age_49, age_64, age_65) %>% 
    arrange(desc(growth_perc)) %>% 
    select(date, age_group, total_cases, growth_perc)

head(age_all) %>% 
    knitr::kable(
    caption = "Highest total cases growth rate by age",
    col.names = c("Date", "Age", "Total cases", "Growth rate"),
    digits = 2
  )
Highest total cases growth rate by age
Date Age Total cases Growth rate
2020-04-29 0-17 1398 9.66
2020-04-27 0-17 1190 8.82
2020-04-23 0-17 936 8.65
2020-05-05 0-17 1937 8.47
2022-01-09 0-17 945336 8.29
2022-01-16 0-17 1158294 7.62
age_all_low = 
    rbind(age_17, age_49, age_64, age_65) %>% 
    arrange(growth_perc) %>% 
    select(date, age_group, total_cases, growth_perc)

head(age_all_low) %>% 
    knitr::kable(
    caption = "Lowest total cases growth rate by age",
    col.names = c("Date", "Age", "Total cases", "Growth rate"),
    digits = 2
  )
Lowest total cases growth rate by age
Date Age Total cases Growth rate
2021-06-29 65+ 391708 -0.17
2021-06-29 50-64 703990 -0.13
2021-06-29 18-49 2127853 -0.11
2021-06-29 0-17 484599 -0.10
2021-04-23 65+ 384595 -0.01
2020-12-23 0-17 234174 0.00

[Text] Note: the red line of boxplot is median

Gender vs Total Cases

point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
grid.arrange(
 
    gender %>% 
    ggplot(aes(x = gender, y = total_cases, fill = gender)) +
    geom_bar(stat = "identity", width = 0.5) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Gender") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_y_continuous(labels = point),

    gender %>% 
    ggplot(aes(x = total_cases, fill = gender)) +
    geom_histogram(aes(y = ..density..), alpha = 0.5, bins = 30) +
    geom_density(alpha = 0.3, aes(color = gender)) +
    scale_color_viridis(discrete = TRUE) +
    labs(x = "Total cases",
         y = "Density") +
    theme(legend.position = "right") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_x_continuous(labels = point) +
    scale_y_continuous(labels = point) +
    scale_fill_viridis(discrete = TRUE),
    
    
    gender %>%
    ggplot(aes(x = gender, y = total_cases, fill = gender)) +
    geom_boxplot(alpha = 0.5) +
    geom_hline(yintercept = median(gender$total_cases, na.rm = T), color = "red", size = 0.4, lty = "dashed") +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Gender") +
    ylab("Total cases") +
        theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point),


layout_matrix = rbind(c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2)
))

gender_M =
  gender %>% 
  filter(gender == "Male") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)

gender_F =
  gender %>% 
  filter(gender == "Female") %>% 
  arrange(date) %>% 
  mutate(lag = lag(total_cases)) %>% 
  mutate(growth_perc = (((total_cases - lag) / total_cases)) * 100)

gender_all =
    rbind(gender_M, gender_F) %>%
    arrange(desc(growth_perc)) %>% 
    select(date, gender, total_cases, growth_perc)

head(gender_all) %>% 
    knitr::kable(
    caption = "Highest total cases growth rate by gender",
    col.names = c("Date", "Gender", "Total cases", "Growth rate"),
    digits = 2
  )
Highest total cases growth rate by gender
Date Gender Total cases Growth rate
2020-04-29 Male 24372 5.44
2022-01-09 Female 3034425 5.31
2020-04-23 Female 19394 5.15
2020-04-24 Female 20395 4.91
2022-01-09 Male 2813232 4.89
2022-01-16 Female 3445681 4.80
gender_all_1 =
    rbind(gender_M, gender_F) %>%
    arrange(growth_perc) %>% 
    select(date, gender, total_cases, growth_perc)
head(gender_all_1) %>% 
    knitr::kable(
    caption = "Lowest total cases growth rate by gender",
    col.names = c("Date", "Gender", "Total cases", "Growth rate"),
    digits = 2
  )
Lowest total cases growth rate by gender
Date Gender Total cases Growth rate
2021-06-29 Male 1774418 -0.12
2021-06-29 Female 1884983 -0.11
2020-12-23 Male 945758 0.00
2020-12-30 Male 1064781 0.00
2020-12-23 Female 993649 0.00
2020-12-30 Female 1121071 0.00

[Text]

Race vs Total Cases

point <- format_format(big.mark = " ", decimal.mark = ",", scientific = FALSE)
grid.arrange(
 
    race %>%
    mutate(race_group = recode(race_group, "American Indian or Alaska Native" = "Indian/Alaska",
           "Native Hawaiian and other Pacific Islander" = "Hawaiian/Islander")) %>% 
    mutate(race_group = fct_reorder(race_group, total_cases)) %>%
    ggplot(aes(x = race_group, y = total_cases, fill = race_group)) +
    geom_bar(stat = "identity", width = 0.5) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Race") +
    ylab("Total cases") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
    scale_y_continuous(labels = point) +
    theme(axis.title.x = element_blank(),
         axis.text.x = element_blank(),
         axis.ticks.x = element_blank()),
    

    race %>% 
    mutate(race_group = recode(race_group, "American Indian or Alaska Native" = "Indian/Alaska",
           "Native Hawaiian and other Pacific Islander" = "Hawaiian/Islander")) %>% 
    mutate(race_group = fct_reorder(race_group, total_cases)) %>%
    ggplot(aes(x = total_cases, fill = race_group)) +
    geom_histogram(aes(y = ..density..), alpha = 0.5, bins = 30) +
    geom_density(alpha = 0.1, aes(color = race_group)) +
    scale_color_viridis(discrete = TRUE) +
    labs(x = "Total cases",
         y = "Density") +
    theme(legend.position = "right") +
    theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) + 
    xlim(0, 2.5e6) +   
    ylim(0, 1.5e-5),
   
    


race %>%
    mutate(race_group = fct_reorder(race_group, total_cases)) %>% 
    mutate(race_group = recode(race_group, "American Indian or Alaska Native" = "Indian/Alaska",
           "Native Hawaiian and other Pacific Islander" = "Hawaiian/Islander")) %>% 
    ggplot(aes(x = race_group, y = total_cases, fill = race_group)) +
    geom_boxplot(alpha = 0.5) +
    geom_hline(yintercept = median(race$total_cases, na.rm = T), color = "red", size = 0.4, lty = "dashed") +
    ylim(0, 30000) +
    scale_fill_viridis(discrete = TRUE) +
    theme(
      legend.position = "none"
    ) +
    xlab("Race") +
    ylab("Total cases") +
        theme(legend.title = element_text(size = 5),
          legend.key.size = unit(0.3, 'cm'),
          legend.text = element_text(size = 4)) +
    theme(
          axis.title.x = element_text(size = 6),
          axis.text.x = element_text(size = 5),
          axis.title.y = element_text(size = 6),
          axis.text.y = element_text(size = 5)) +
          scale_y_continuous(labels = point) +
    theme(axis.text.x = element_text(angle = 60, hjust = 1)),
 


layout_matrix = rbind(c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(1, 1, 1, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2),
                      c(3, 3, 3, 2, 2, 2, 2)
))

(table for race and demo have not finished yet)

[Text]

Death Rate

Age vs Death Rate

age %>% 
    mutate(date = factor(date)) %>%
    mutate(text_label = str_c("Date: ", date, 
                              "\n Age: ", age_group,
                              "\n Death(%): ", percent_deaths)) %>%
    plot_ly(y = ~percent_deaths, 
          x = ~date, 
          color = ~age_group, 
          width = 950,
          height = 300, 
          type = "scatter",
          mode = "markers",
          marker = list(size = 3),
          colors = "inferno",
          text = ~ text_label) %>%
    layout(xaxis = list(
           title = "Date",
           tickangle = 60),
           yaxis = list(
           title = "Death Rate"))
<<<<<<< HEAD
=======
>>>>>>> e6526255c1f2aa5c6ee6a707b36bad8ccd212957

[Text]

Gender vs Death Rate

gender %>% 
    mutate(date = factor(date)) %>%
    mutate(text_label = str_c("Date: ", date, 
                              "\n Gender: ", gender,
                              "\n Death(%): ", percent_deaths)) %>%
    plot_ly(y = ~percent_deaths, 
          x = ~date, 
          color = ~gender, 
          width = 950,
          height = 300, 
          type = "scatter",
          mode = "markers",
          marker = list(size = 3),
          colors = "viridis",
          text = ~ text_label) %>%
    layout(xaxis = list(
           title = "Date",
           tickangle = 60),
           yaxis = list(
           title = "Death Rate"))
<<<<<<< HEAD
=======
>>>>>>> e6526255c1f2aa5c6ee6a707b36bad8ccd212957

[Text]

Race vs Death Rate

race %>% 
    mutate(date = factor(date)) %>%
    mutate(race_group = recode(race_group, "American Indian or Alaska Native" = "Indian/Alaska",
           "Native Hawaiian and other Pacific Islander" = "Hawaiian/Islander")) %>% 
    mutate(text_label = str_c("Date: ", date, 
                              "\n Race: ", race_group,
                              "\n Death(%): ", percent_deaths)) %>%
    plot_ly(y = ~percent_deaths, 
          x = ~date, 
          color = ~race_group, 
          width = 950,
          height = 300, 
          type = "scatter",
          mode = "markers",
          marker = list(size = 3),
          colors = "inferno",
          text = ~ text_label) %>%
    layout(xaxis = list(
           title = "Date",
           tickangle = 60),
           yaxis = list(
           title = "Death Rate"))
<<<<<<< HEAD

[Text]

=======

[Text]

Area vs Death Rate

demo %>% 
    mutate(percent_deaths = (cumulative_deaths / cumulative_cases) * 100) %>%
    mutate(date = factor(date)) %>%
    mutate(text_label = str_c("Date: ", date, 
                              "\n Area: ", county_name,
                              "\n Death(%): ", percent_deaths)) %>%
    plot_ly(y = ~percent_deaths, 
          x = ~date, 
          color = ~county_name , 
          width = 950,
          height = 500, 
          type = "scatter",
          mode = "markers",
          marker = list(size = 3),
          colors = "inferno",
          text = ~ text_label) %>%
    layout(xaxis = list(
           title = "Date",
           tickangle = 60),
           yaxis = list(
           title = "Death Rate",
           range = c(0, 13)))

[Text: Note: states users can use our dashboard to research this]

>>>>>>> e6526255c1f2aa5c6ee6a707b36bad8ccd212957